#!/bin/bash
FILES=( "genres.list" "movies.list" "actors.list" "actresses.list" "directors.list" "writers.list" "producers.list" )
HEAD_EXP=( "8: THE GENRES LIST" "MOVIES LIST" "^(-+)[\t ]+(-+)[\t ]*\n$" "^(-+)[\t ]+(-+)[\t ]*\n$" \
"^(-+)[\t ]+(-+)[\t ]*\n$" "^(-+)[\t ]+(-+)[\t ]*\n$" "^(-+)[\t ]+(-+)[\t ]*\n$" )
TAIL_EXP=( "^[\t ]+\n$" "^(-+)[\t ]*\n$" "^(-+)[\t ]*\n$" "^(-+)[\t ]*\n$" "^(-+)[\t ]*\n$" "^(-+)[\t ]*\n$" "^(-+)[\t ]*\n$" )
IGNORE_LINES=( 2 2 0 0 0 0 0 )

echo "Obtaining lists from IMDB..."

./get_lists.sh ${FILES[@]}

if [ $? -ne 0 ]; then
    echo "ERROR: Lists could not been obtained!"
    exit 1
fi

echo "Encoding lists to UTF-8..."

./toutf8.sh ${FILES[@]}

if [ $? -ne 0 ]; then
    echo "ERROR: Lists could not been encoded to UTF-8!"
    exit 1
fi

echo "Cleaning lists..."

l=0;
while [ $l -lt ${#FILES[@]} ]; do
    ./clean.py "${HEAD_EXP[l]}" "${TAIL_EXP[l]}" "${IGNORE_LINES[l]}" < "${FILES[l]}.utf8" > "${FILES[l]}.clean"
    if [ $? -ne 0 ]; then
	echo "ERROR: Lists could not been cleaned!"
	exit 1
    fi
    l=$[l+1]
done

for f in ${FILES[@]}; do
    rm -f $f $f.utf8
done